/*****************************************************************
* Do File: all_figs.do
*
* Purpose:
*   This file takes as input the clean_census_tract.dta and 
*   metro_level.dta datasets. It then produces several figures 
*   illustrating various patterns of segregation and inequality.
*
* Figures Created:
*   fig-1:  Theil Index over time
*   fig-3:  Inequality and Segregation over time
*   fig-4:  Segregation and Inequality for different samples
*   fig-5:  Scatter plot of Gini and Dissim, levels and differences
*   fig-16: Segregation for different income cutoffs
*
*****************************************************************/



********************************************************************************
* Fig1: Theil Index Calculations : At the Census Level
********************************************************************************

****************
** All families
***************

use "$output/clean_census_tract.dta", clear
keep if ind_base==1
drop if f_count_ct == .
keep year metro countyfips fips bracket_no f_count_ct income
collapse (sum) f_count_ct, by(year metro fips bracket_no income)

bysort year metro fips (income) :  egen hhs = sum(f_count_ct)
gen fk = f_count_ct / hhs
gen inc_weight = income * fk

bysort year metro fips (income) :  egen mu = sum(inc_weight)

gen theil = fk * income / mu * log(income/mu)

collapse (sum) theil f_count_ct inc_weight, by(year metro fips)
rename inc_weight  mu
keep if theil ~= .

bysort year metro (fips):  egen hhs = sum(f_count_ct)
bysort year metro (fips) :  egen new_mu = sum(mu*f_count_ct)
replace new_mu = new_mu/hhs
gen si = f_count_ct/hhs*mu/new_mu

gen within = si * theil
gen across = si * log(mu / new_mu)
collapse (sum) within across, by(year metro)

merge 1:m year metro using "$output/popweight.dta"

collapse (mean) within across [aweight = population], by(year)
gen Theil = within + across

twoway (connected within  year, lp(dash) lc(maroon) mc(maroon) ) (connected across year ,  lp(dash) lc(green) mc(green)) (connected Theil year ,  lc(navy) mc(navy)), ///
xtitle("") ytitle(`"{fontface "Palatino Linotype":Theil Index}"', color(navy)  ) ///
legend(lab(1 `"{fontface "Palatino Linotype":Within}"') lab(2 `"{fontface "Palatino Linotype":Across}"') lab(3 `"{fontface "Palatino Linotype": Total}"'))  graphregion(color(white)) bgcolor(white) ylabel(0(0.05)0.35, axis(1) nogrid)




********************************************************************************
* Fig-3: Inequality and Segregation over time 
********************************************************************************



use "$output/metro_level.dta", clear
keep year population *dissim *_weighted
collapse (mean) *dissim *_weighted [aw=population], by(year)


twoway ///
    (connected all_fam_weighted year, yaxis(1) lp(dash) lc(navy) mc(navy)) ///
    (connected allfamdissim year, yaxis(2) lp(dash) lc(maroon) mc(maroon)), ///
    xtitle("") ///
    ytitle(`"{fontface "Palatino Linotype":Inequality}"', axis(1) color(navy)) ///
    ytitle(`"{fontface "Palatino Linotype":Segregation}"', axis(2) color(maroon)) ///
    legend(order(1 `"{fontface "Palatino Linotype":Inequality}"' 2 `"{fontface "Palatino Linotype":Segregation}"') ///
           position(6) ring(0) rows(1)) ///
    graphregion(color(white)) bgcolor(white) ///
    ylabel(0.36(0.02)0.46, axis(1) nogrid) ///
    ylabel(0.30(0.02)0.40, axis(2) nogrid)
	
	
	
***************************************************
** Figure 4a : Dissim for different Samples 
*****************************************************
** Dissimilarity Index for different samples 

use "$output/metro_level.dta", clear
collapse (mean) allfamdissim famkidsdissim famnokidsdissim [aw=population], by(year)
twoway (connected allfamdissim year, lp(dash) lc(maroon) mc(maroon) ) (connected famkidsdissim year ,   lc(navy) mc(navy)) (connected famnokidsdissim year , lp(dash) lc(green) mc(green)), ///
xtitle("") ytitle(`"{fontface "Palatino Linotype":Segregation}"', color(navy)  ) ///
legend(lab(1 `"{fontface "Palatino Linotype":All Families}"') lab(2 `"{fontface "Palatino Linotype":Families with children}"') lab(3 `"{fontface "Palatino Linotype": Families without children}"'))  graphregion(color(white)) bgcolor(white) ylabel(0.30(0.03)0.48,  axis(1) nogrid)





***************************************************
** Figure 4b : Gini  for different Samples 
*****************************************************
use "$output/metro_level.dta", clear
keep year *_weighted
collapse (mean) *_weighted, by(year)

twoway (connected all_fam_weighted year, lp(dash) lc(maroon) mc(maroon) ) (connected famkids_weighted year ,   lc(navy) mc(navy)) (connected fam_nokids_weighted year , lp(dash) lc(green) mc(green)), ///
xtitle("") ytitle(`"{fontface "Palatino Linotype":Inequality}"', color(navy)  ) ///
legend(lab(1 `"{fontface "Palatino Linotype":All Families}"') lab(2 `"{fontface "Palatino Linotype":Families with children}"') lab(3 `"{fontface "Palatino Linotype": Families without children}"'))  graphregion(color(white)) bgcolor(white) ylabel(0.36(0.03)0.48,  axis(1) nogrid)



***************************************************
** Figure 5 : Scatterplots Dissim and Gini Levels and Differences
*****************************************************
** Metro Name: metro (code)
** Chicago: 16974
** Los Angeles: 31084
** New York: 35644




use "$output/metro_level.dta", clear
keep year metro population allfamdissim allfamgini
xtset metro year 
ds year metro population, not
foreach var in `r(varlist)'{
	gen d30_`var' = F30.`var' - `var'
}
keep if year==1980

**
** Plots for levels 

label variable allfamdissim "Segregation (1980)"
label variable allfamgini "Inequality (1980)"


** All families 
twoway ///
    (scatter  allfamgini allfamdissim [w=population], msymbol(Oh)) ///
    (lfit  allfamgini allfamdissim), ///
	    xtitle("Segregation 1980") ///
    ytitle("Inequality 1980") ///
	    xlabel(0(0.10)0.60) ///
    ylabel(0.30(0.05)0.50) ///
	title("Levels: All Families") ///
	legend(off)
	





	
	

** Plots for changes 
twoway ///
(scatter  d30_allfamgini d30_allfamdissim [w=population], msymbol(Oh)) ///
(lfit  d30_allfamgini d30_allfamdissim ), ///
 xtitle("Change Segregation 1980-2010") ///
ytitle("Change Inequality 1980-2010") ///
	xlabel(-0.15(0.05)0.25) ///
ylabel(-0.02(0.02)0.12) /// 
title("Changes: All Families") ///
legend(off)






***************************************************
** Figure 16 : Dissimilarity for different cutoffs
*****************************************************






********************************************************************************
**** All Families (Top 50%)
********************************************************************************
use "$output/clean_census_tract.dta", clear
keep if ind_base==1
drop if f_count_ct == .
drop f_count_ct_kids
rename f_count_ct f_count_ct_kids
keep year metro countyfips fips bracket_no f_count_ct_kids
tempfile kids
save `kids'

collapse (sum) f_count_ct_kids, by(year metro bracket_no)
bysort year metro (bracket_no): egen total = sum(f_count_ct_kids)
gen share = f_count_ct_kids / total
* Calculate the cumulative share to identify percentiles
bysort year metro (bracket_no) :  gen cdf = sum(share)
* Find the income bucket that is closest to the 50th percentile
gen flag1 = abs(cdf - 0.5)
bysort year metro (bracket_no) :  egen flagp = min(flag1)
gen flagc = flagp == flag1
* Identify census fips in the top 20%
gen negbrack = -bracket_no
bysort year metro (negbrack) :  gen flagc2 = sum(flagc)
gen id = flagc2 == 0
keep year metro bracket id
* Merge metro level cut-offs into the overall sample
merge 1:m year metro bracket_no using `kids'
* Calculate the total number of households in the rich and poor neighborhoods
* by census fips
collapse (sum) f_count_ct_kids, by(year metro fips id)
bysort year metro id : egen denom = sum(f_count_ct_kids)
* Generate the dissimilarity ratio and add up
gen ratio = f_count_ct_kids / denom * (1 - 2*(id == 1))
collapse (sum) ratio, by(year metro fips)
replace ratio = abs(ratio)/2
collapse (sum) ratio, by(year metro)

merge 1:1 year metro using "$output/popweight.dta"
keep if _merge ==3 
drop _merge

collapse (mean) ratio [aweight=population], by(year)
rename ratio Top50
label var Top50 "Rich Top 50%"

tempfile top50
save `top50'

********************************************************************************
**** All Families (Top 20%)
********************************************************************************
use "$output/clean_census_tract.dta", clear
keep if ind_base==1
drop if f_count_ct == .
drop f_count_ct_kids
rename f_count_ct f_count_ct_kids
keep year metro countyfips fips bracket_no f_count_ct_kids
tempfile kids
save `kids'

collapse (sum) f_count_ct_kids, by(year metro bracket_no)
bysort year metro (bracket_no): egen total = sum(f_count_ct_kids)
gen share = f_count_ct_kids / total
* Calculate the cumulative share to identify percentiles
bysort year metro (bracket_no) :  gen cdf = sum(share)
* Find the income bucket that is closest to the 80th percentile
gen flag1 = abs(cdf - 0.8)
bysort year metro (bracket_no) :  egen flagp = min(flag1)
gen flagc = flagp == flag1
* Identify census fips in the top 20%
gen negbrack = -bracket_no
bysort year metro (negbrack) :  gen flagc2 = sum(flagc)
gen id = flagc2 == 0
keep year metro bracket id
* Merge metro level cut-offs into the overall sample
merge 1:m year metro bracket_no using `kids'
* Calculate the total number of households in the rich and poor neighborhoods
* by census fips
collapse (sum) f_count_ct_kids, by(year metro fips id)
bysort year metro id : egen denom = sum(f_count_ct_kids)
* Generate the dissimilarity ratio and add up
gen ratio = f_count_ct_kids / denom * (1 - 2*(id == 1))
collapse (sum) ratio, by(year metro fips)
replace ratio = abs(ratio)/2
collapse (sum) ratio, by(year metro)

merge 1:1 year metro using "$output/popweight.dta"
keep if _merge ==3 
drop _merge

collapse (mean) ratio [aweight=population], by(year)
rename ratio Top20
label var Top20 "Rich Top 20%"

tempfile top20
save `top20'

********************************************************************************
**** All Families (Top 10%)
********************************************************************************
use "$output/clean_census_tract.dta", clear
keep if ind_base==1
drop if f_count_ct == .
drop f_count_ct_kids
rename f_count_ct f_count_ct_kids
keep year metro countyfips fips bracket_no f_count_ct_kids
tempfile kids
save `kids'

collapse (sum) f_count_ct_kids, by(year metro bracket_no)
bysort year metro (bracket_no): egen total = sum(f_count_ct_kids)
gen share = f_count_ct_kids / total
* Calculate the cumulative share to identify percentiles
bysort year metro (bracket_no) :  gen cdf = sum(share)
* Find the income bucket that is closest to the 80th percentile
gen flag1 = abs(cdf - 0.9)
bysort year metro (bracket_no) :  egen flagp = min(flag1)
gen flagc = flagp == flag1
* Identify census fips in the top 20%
gen negbrack = -bracket_no
bysort year metro (negbrack) :  gen flagc2 = sum(flagc)
gen id = flagc2 == 0
keep year metro bracket id
* Merge metro level cut-offs into the overall sample
merge 1:m year metro bracket_no using `kids'
* Calculate the total number of households in the rich and poor neighborhoods
* by census fips
collapse (sum) f_count_ct_kids, by(year metro fips id)
bysort year metro id : egen denom = sum(f_count_ct_kids)
* Generate the dissimilarity ratio and add up
gen ratio = f_count_ct_kids / denom * (1 - 2*(id == 1))
collapse (sum) ratio, by(year metro fips)
replace ratio = abs(ratio)/2
collapse (sum) ratio, by(year metro)

merge 1:1 year metro using "$output/popweight.dta"
keep if _merge ==3 
drop _merge

collapse (mean) ratio [aweight=population], by(year)
rename ratio Top10
label var Top10 "Rich Top 10%"

tempfile top10
save `top10'

********************************************************************************
**** Merging the datasets together
********************************************************************************

merge 1:1 year using `top20'
keep if _merge == 3
drop _merge

merge 1:1 year using `top50'
keep if _merge == 3
drop _merge




twoway (connected Top50 year, lp(dash) lc(green) mc(green) ) (connected Top20 year ,  lp(dash) lc(maroon) mc(maroon)) (connected Top10 year ,  lc(navy) mc(navy)), ///
xtitle("") ytitle(`"{fontface "Palatino Linotype":Segregation}"', color(navy)  ) ///
legend(lab(1 `"{fontface "Palatino Linotype":Rich Top 50%}"') lab(2 `"{fontface "Palatino Linotype":Rich Top 20%}"') lab(3 `"{fontface "Palatino Linotype": Rich Top 10%}"'))  graphregion(color(white)) bgcolor(white) ylabel(, axis(1) nogrid)




